#!/home/chunwei/chunenv/bin/python
# -*- coding: utf-8 -*-
import sys
import pandas as pd
import numpy as np
import datetime as d
import argparse
'''
对用户最近的event的比例作分析

包括特征：

event_[overall]_ratio:  各个event总的数目
event_[overall]_ratio.latest.5: 最近5天
event_[overall]_ratio.latest.10: 最近10天
'''

def parse_args():
    if len(sys.argv) == 1:
        sys.argv.append('-h')

    parser = argparse.ArgumentParser()
    parser.add_argument('count_path')
    parser.add_argument('output')
    args = vars(parser.parse_args())
    return args

args = parse_args()
count_path = args['count_path']
output = args['output']

count = pd.read_csv(count_path)

out_data = pd.DataFrame()
out_data['enrollment_id'] = count.enrollment_id

events = [ "problem", "video", "access", "wiki", "discussion", "nagivate", "page_close"]

# 总体时间
overall_count = getattr(count, "event_overall_count")
for event in events:
    key = "event_%s_count" % event
    out_key = key + ".ratio"
    out_data[out_key] = getattr(count, key) / overall_count

# 最近N天
for N in (5, 10):
    overall_count = getattr(count, "event_overall_count.latest.%d" % N)
    for event in events:
        key = "event_%s_count.latest.%d" % (event, N)
        out_key = key + ".ratio"
        out_data[out_key] = getattr(count, key) / overall_count

# 最近N天overall 占总体的比例
for N in (5, 10):
    out_data["event_overall_count.latest.%d.ratio" %  N] = getattr(count, "event_overall_count.latest.%d" % N) / count.event_overall_count

print out_data.head()
print 'output to ', output

out_data.to_csv(output, index=False)
